06.06.2017
tidyCL<-read.csv("tortoises_tidy.csv", sep=";", header=TRUE)
colnames(tidyCL)[6] <- "MAmin"
colnames(tidyCL)[7] <- "Mamax"
colnames(tidyCL)[17] <- "CL"
colnames(tidyCL)[18] <- "PL"
statsCL <- tidyCL %>%
dplyr::filter(!is.na(CL)) %>%
summarise(min = min(CL), max = max(CL), var(CL), mean= mean(CL), median= median(CL))#, skew(CL), kurtosi(CL)) n = n(),
Map <- tidyCL %>%
dplyr::select(Genus, Taxon, Latitude, Longitude, Country, CL, PL) %>%
group_by(Latitude) %>%
mutate(count= n())
mapWorld <- borders("world", colour="azure3", fill="azure3") # create a layer of borders
mp <- Map %>%
ggplot(aes(Longitude, Latitude)) + mapWorld +
# geom_point(fill="red", colour="red", size=0.5) +
geom_point(aes(Longitude, Latitude,colour=CL, size=count))
mp
library(plotly)
ggplotly(mp)
Get an overview over body size data
tidyCL <- tidyCL %>%
mutate(Age= (MAmin+Mamax)/2)
hist(tidyCL$CL)
hist(tidyCL$Age)
TO DO:
- map localities with differing colors for: CL available, CL extrapolated (from PL or figures), CL missing
- complete data set!
- get missing refences/make list of missing references
08.06.17
Map all localities with sample size and age indicated (regardless of whether CL information is available):
test<-read.csv("tortoises13-04.csv", sep=";", header=TRUE)
colnames(test)[6] <- "Mamin"
colnames(test)[7] <- "Mamax"
Test <- test %>%
dplyr::select(Locality, Country, Latitude, Longitude, Mamin, Mamax, Epoch, Genus, Species, Taxon, CL) %>%
mutate(Age= (Mamin+Mamax)/2) %>% # create mean age
group_by(Latitude) %>%
mutate(count= n())
#mapWorld <- borders("world", colour="azure3", fill="azure3") # create a layer of borders
map <- Test %>%
ggplot(aes(Longitude, Latitude)) + mapWorld +
#geom_point(fill="red", colour="red", size=0.5) +
geom_point(aes(Longitude, Latitude,colour=Age, size=count))
map
ggplotly(map)
TO DO:
- get general statistical overview over data (stru, normal distribution?, mean/mode/median/min/max, hist plot etc. –> see Catalina’s paper)
Try paleoTS with some first real data. Here is the underlying data:
tidyCL
Prepare data for conversion to paleoTS-object:
SampleSize <- tidyCL %>%
dplyr::select(MAmin, Mamax, CL) %>%
filter(CL != "NA")
length(SampleSize$CL)
TidyCL <- tidyCL %>%
dplyr::select(MAmin, Mamax, CL) %>%
dplyr::filter(CL != "NA") %>%
mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
group_by(tt) %>% #create time bins
summarise(mm=mean(CL), vv=var(CL), nn=n()) #create means etc. for each time bin
TidyCL[is.na(TidyCL)]<-0 #subset NAs with O for
TidyCL
bins <- tidyCL %>%
# select(MAmin, Mamax, CL) %>%
filter(CL != "NA") %>%
mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
group_by(tt)
bins
library(paleoTS)
paleoTidyCL <-as.paleoTS(TidyCL$mm, TidyCL$vv, TidyCL$nn, TidyCL$tt, MM = NULL, genpars = NULL, label = "Testudinidae body size evolution mode")
paleoTidyCL
plot(paleoTidyCL)
fit3models(paleoTidyCL, silent=FALSE, method="AD", pool=FALSE) #not working with Test1, because no variances/sample sizes available, I guess
15.06.2017
Use paleoTS with data from the past 10 Mya (today - Pliocene, beginning of Miocene)
unique(tidyCL$Epoch)
PleiPlioCL <- tidyCL %>%
filter(Age < 10.000)
length(PleiPlioCL$CL)
PPCL <- PleiPlioCL %>%
select(MAmin, Mamax, CL) %>%
filter(CL != "NA") %>%
mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
group_by(tt) %>% #create time bins
summarise(mm=mean(CL), vv=var(CL), nn=n()) #create means etc. for each time bin
PPCL[is.na(PPCL)]<-0 #subset NAs with O for
PPCL
bins <- PleiPlioCL %>%
# select(MAmin, Mamax, CL) %>%
filter(CL != "NA") %>%
mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
group_by(tt)
bins
paleoPPCL <-as.paleoTS(PPCL$mm, PPCL$vv, PPCL$nn, PPCL$tt, MM = NULL, genpars = NULL, label = "Testudinidae body size evolution mode")
paleoPPCL
plot(paleoPPCL)
fit3models(paleoPPCL, silent=FALSE, method="AD", pool=FALSE) #not working with Test1, because no variances/sample sizes available, I guess##### play around with speciesgeocodeR 07.06.17 #######
PPmap <- PleiPlioCL %>%
select(Genus, Taxon, Latitude, Longitude, Country, CL, PL, Age) %>%
group_by(Latitude) %>%
mutate(count= n()) %>%
ggplot(aes(Longitude, Latitude)) + mapWorld +
geom_point(aes(Longitude, Latitude,colour=Age, size=count))
PPmap
ggplotly(PPmap)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
TO DO:
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
---
title: "Body size trends in Neogene tortoises"
output:
  pdf_document: default
  html_notebook: default
  html_document: default
always_allow_html: yes
---

```{r "setup", include=FALSE}
require("knitr")
opts_knit$set(root.dir = "//naturkundemuseum-berlin.de/MuseumDFSRoot/Benutzer/Julia.Joos/Eigene Dateien/MA")
library(dplyr)
library(ggplot2)
```

# 30.05.2017

## TO DO:
\begin{itemize}
\item figure out if Checklist data is of any use (means? medians? sample size?) or see if authors can provide necessary data
\item do paleoTS analyses with FFB data set
\item read Hunt papers (see citations in Catalina's paper 2006, 2008, 2008, 2010; also 2015)
\item figure out how to implement phylogeny... well, figure out how to do paleoTS analyses with more than one taxon without pooling everything together (as in Test2)
\end{itemize}


# 06.06.2017
```{r "Map fossil records with CL information available"}
tidyCL<-read.csv("tortoises_tidy.csv", sep=";", header=TRUE)


colnames(tidyCL)[6] <- "MAmin"
colnames(tidyCL)[7] <- "Mamax"
colnames(tidyCL)[17] <- "CL"
colnames(tidyCL)[18] <- "PL"


statsCL <- tidyCL %>%
  dplyr::filter(!is.na(CL)) %>%
  summarise(min = min(CL), max = max(CL), var(CL), mean= mean(CL), median= median(CL))#, skew(CL), kurtosi(CL)) n = n(), 



Map <- tidyCL %>%
  dplyr::select(Genus, Taxon, Latitude, Longitude, Country, CL, PL) %>%
  group_by(Latitude) %>%
  mutate(count= n())

mapWorld <- borders("world", colour="azure3", fill="azure3") # create a layer of borders


mp <- Map %>%
  ggplot(aes(Longitude, Latitude)) + mapWorld +
#  geom_point(fill="red", colour="red", size=0.5) +
  geom_point(aes(Longitude, Latitude,colour=CL, size=count))

mp


library(plotly)


ggplotly(mp)

```

Get an overview over body size data

```{r "Data structure"}
tidyCL <-  tidyCL %>%
  mutate(Age= (MAmin+Mamax)/2)

hist(tidyCL$CL)
hist(tidyCL$Age)

```


## TO DO:
* map localities with differing colors for: CL available, CL extrapolated (from PL or figures), CL missing
* complete data set! 
  + get missing refences/make list of missing references


# 08.06.17

Map all localities with sample size and age indicated (regardless of whether CL information is available):
```{r "Map all records indicating age and sample size, disregarding availability of CL"}
test<-read.csv("tortoises13-04.csv", sep=";", header=TRUE)

colnames(test)[6] <- "Mamin"
colnames(test)[7] <- "Mamax"

Test <- test %>%
  dplyr::select(Locality, Country, Latitude, Longitude, Mamin, Mamax, Epoch, Genus, Species, Taxon, CL) %>%
  mutate(Age= (Mamin+Mamax)/2) %>%   # create mean age
  group_by(Latitude) %>%
  mutate(count= n())

#mapWorld <- borders("world", colour="azure3", fill="azure3") # create a layer of borders  
  
map <- Test %>%
  ggplot(aes(Longitude, Latitude)) + mapWorld +
  #geom_point(fill="red", colour="red", size=0.5) +
  geom_point(aes(Longitude, Latitude,colour=Age, size=count))

map

ggplotly(map)
```

## TO DO:
* get general statistical overview over data (stru, normal distribution?, mean/mode/median/min/max, hist plot etc. --> see Catalina's paper)



Try paleoTS with some first real data.
Here is the underlying data:

```{r "Get actual data"}
tidyCL

```

Prepare data for conversion to paleoTS-object:

```{r "Data preparation for analyses with paleoTS"}

SampleSize <- tidyCL %>%
  dplyr::select(MAmin, Mamax, CL) %>%
  filter(CL != "NA")

length(SampleSize$CL)


TidyCL <- tidyCL %>%
  dplyr::select(MAmin, Mamax, CL) %>%
  dplyr::filter(CL != "NA") %>%
  mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
  group_by(tt) %>% #create time bins
  summarise(mm=mean(CL), vv=var(CL), nn=n()) #create means etc. for each time bin 

TidyCL[is.na(TidyCL)]<-0 #subset NAs with O for 

TidyCL



bins <- tidyCL %>%
#  select(MAmin, Mamax, CL) %>%
  filter(CL != "NA") %>%
  mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
  group_by(tt)

bins

```



```{r "Try paleoTS with actual data"}
library(paleoTS)
paleoTidyCL <-as.paleoTS(TidyCL$mm, TidyCL$vv, TidyCL$nn, TidyCL$tt, MM = NULL, genpars = NULL, label = "Testudinidae body size evolution mode")
paleoTidyCL
plot(paleoTidyCL)

fit3models(paleoTidyCL, silent=FALSE, method="AD", pool=FALSE)   #not working with Test1, because no variances/sample sizes available, I guess

```


#15.06.2017

Use paleoTS with data from the past 10 Mya (today - Pliocene, beginning of Miocene)

```{r "Body size trends from Pliocene/Miocene until today"}
unique(tidyCL$Epoch)

PleiPlioCL <- tidyCL %>%
  filter(Age < 10.000)

length(PleiPlioCL$CL)

PPCL <- PleiPlioCL %>%
  select(MAmin, Mamax, CL) %>%
  filter(CL != "NA") %>%
  mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
  group_by(tt) %>% #create time bins
  summarise(mm=mean(CL), vv=var(CL), nn=n()) #create means etc. for each time bin 

PPCL[is.na(PPCL)]<-0 #subset NAs with O for 

PPCL

bins <- PleiPlioCL %>%
  #  select(MAmin, Mamax, CL) %>%
  filter(CL != "NA") %>%
  mutate(tt= (MAmin+Mamax)/2) %>% # create mean age
  group_by(tt)

bins


paleoPPCL <-as.paleoTS(PPCL$mm, PPCL$vv, PPCL$nn, PPCL$tt, MM = NULL, genpars = NULL, label = "Testudinidae body size evolution mode")
paleoPPCL
plot(paleoPPCL)

fit3models(paleoPPCL, silent=FALSE, method="AD", pool=FALSE)   #not working with Test1, because no variances/sample sizes available, I guess##### play around with speciesgeocodeR 07.06.17 #######

```


```{r}
PPmap <- PleiPlioCL %>%
  select(Genus, Taxon, Latitude, Longitude, Country, CL, PL, Age) %>%
  group_by(Latitude) %>%
  mutate(count= n()) %>%
  ggplot(aes(Longitude, Latitude)) + mapWorld +
  geom_point(aes(Longitude, Latitude,colour=Age, size=count))

PPmap

ggplotly(PPmap)
```

## TO DO: 
* finish data set
* 

#######################################################################################################

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).
